getwd()
## [1] "/Users/alexg/R files/hair_cortisol/skew-normal FINAL"
library(readxl)
library(psych)
library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
## method from
## plot.transform scales
## print.transform scales
##
## Attaching package: 'dlookr'
## The following object is masked from 'package:psych':
##
## describe
## The following object is masked from 'package:base':
##
## transform
library(vtable)
## Loading required package: kableExtra
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:psych':
##
## cs
## The following object is masked from 'package:stats':
##
## ar
library(rethinking)
## Loading required package: cmdstanr
## This is cmdstanr version 0.8.0
## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
## - CmdStan path: /Users/alexg/.cmdstan/cmdstan-2.36.0
## - CmdStan version: 2.36.0
## Loading required package: posterior
## This is posterior version 1.6.1
##
## Attaching package: 'posterior'
## The following object is masked from 'package:dlookr':
##
## entropy
## The following objects are masked from 'package:stats':
##
## mad, sd, var
## The following objects are masked from 'package:base':
##
## %in%, match
## Loading required package: parallel
## rethinking (Version 2.42)
##
## Attaching package: 'rethinking'
## The following objects are masked from 'package:brms':
##
## LOO, stancode, WAIC
## The following objects are masked from 'package:psych':
##
## logistic, logit, sim
## The following object is masked from 'package:stats':
##
## rstudent
library(loo)
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
##
## Attaching package: 'loo'
## The following object is masked from 'package:rethinking':
##
## compare
library(priorsense)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ tidyr::expand() masks reshape::expand()
## ✖ tidyr::extract() masks dlookr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::map() masks rethinking::map()
## ✖ reshape::rename() masks dplyr::rename()
## ✖ lubridate::stamp() masks reshape::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-6.0: type help(sm) for summary information
##
## Attaching package: 'sm'
##
## The following object is masked from 'package:dlookr':
##
## binning
##
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(bayesplot)
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
##
## Attaching package: 'bayesplot'
##
## The following object is masked from 'package:posterior':
##
## rhat
##
## The following object is masked from 'package:brms':
##
## rhat
library(bayestestR)
df <- read_xlsx("hair_cort_dog_all.xlsx", col_types = c("text", "text",
"text", "text", "text", "text",
"text", "numeric","text", "skip",
"numeric", "skip", "skip",
"numeric", "skip"))
df <- as.data.frame(df)
dim(df) # will tell you how many rows and columns the dataset has
## [1] 73 11
class(df) # will tell you what data structure has the dataset been assigned
## [1] "data.frame"
head(df)
## number group visit season breed_group coat_colour sex age comorbidity
## 1 c1 stopped v0 winter ret dark Male 43 yes
## 2 c2 stopped v0 autumn mix dark Male 105 yes
## 3 c3 stopped v0 spring ckcs mix Female 117 yes
## 4 c4 stopped v0 summer ret dark Female 108 yes
## 5 c5 stopped v0 summer ret dark Female 110 yes
## 6 c6 stopped v0 winter mix light Female 120 yes
## fat_percent cortisol
## 1 52.21393 4.924220
## 2 38.52059 7.304202
## 3 46.94916 1.590000
## 4 44.46813 0.861570
## 5 39.59363 6.217317
## 6 NA 4.426785
numeric_df <- Filter(is.numeric, df)
describe(numeric_df) # the describe function in psych provides summary stats
## # A tibble: 3 × 26
## described_variables n na mean sd se_mean IQR skewness kurtosis
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 73 0 95.8 35.6 4.16 44 -0.104 -0.00589
## 2 fat_percent 55 18 40.5 7.82 1.05 7.82 -0.294 1.12
## 3 cortisol 73 0 8.11 16.5 1.93 5.43 4.05 18.7
## # ℹ 17 more variables: p00 <dbl>, p01 <dbl>, p05 <dbl>, p10 <dbl>, p20 <dbl>,
## # p25 <dbl>, p30 <dbl>, p40 <dbl>, p50 <dbl>, p60 <dbl>, p70 <dbl>,
## # p75 <dbl>, p80 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, p100 <dbl>
plot_normality(numeric_df)
apply(numeric_df, 2, shapiro.test)
## $age
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.97361, p-value = 0.1288
##
##
## $fat_percent
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.97956, p-value = 0.4692
##
##
## $cortisol
##
## Shapiro-Wilk normality test
##
## data: newX[, i]
## W = 0.46269, p-value = 6.756e-15
qqnorm(df$cortisol)
qqline(df$cortisol, col = "red")
qqnorm(log(df$cortisol))
qqline(log(df$cortisol), col = "red")
shapiro.test(log(df$cortisol))
##
## Shapiro-Wilk normality test
##
## data: log(df$cortisol)
## W = 0.94725, p-value = 0.004126
summary(df$cortisol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4141 1.4119 2.3331 8.1089 6.8455 104.6172
summary(log(df$cortisol))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8817 0.3449 0.8472 1.1816 1.9236 4.6503
df$lgCort <- log(df$cortisol)
summary(df$lgCort)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8817 0.3449 0.8472 1.1816 1.9236 4.6503
hist(df$lgCort)
df$breed <- df$breed_group
df$breed <- factor(df$breed, levels = c("mix", "ckcs", "pug", "ret", "other"))
head(df$breed)
## [1] ret mix ckcs ret ret mix
## Levels: mix ckcs pug ret other
sumtable(df)
| Variable | N | Mean | Std. Dev. | Min | Pctl. 25 | Pctl. 75 | Max |
|---|---|---|---|---|---|---|---|
| group | 73 | ||||||
| … completed | 42 | 58% | |||||
| … stopped | 31 | 42% | |||||
| visit | 73 | ||||||
| … v0 | 52 | 71% | |||||
| … v1 | 21 | 29% | |||||
| season | 73 | ||||||
| … autumn | 21 | 29% | |||||
| … spring | 14 | 19% | |||||
| … summer | 22 | 30% | |||||
| … winter | 16 | 22% | |||||
| breed_group | 73 | ||||||
| … ckcs | 7 | 10% | |||||
| … mix | 16 | 22% | |||||
| … other | 26 | 36% | |||||
| … pug | 7 | 10% | |||||
| … ret | 17 | 23% | |||||
| coat_colour | 73 | ||||||
| … dark | 30 | 41% | |||||
| … light | 28 | 38% | |||||
| … mix | 15 | 21% | |||||
| sex | 73 | ||||||
| … Female | 43 | 59% | |||||
| … Male | 30 | 41% | |||||
| age | 73 | 96 | 36 | 16 | 73 | 117 | 182 |
| comorbidity | 73 | ||||||
| … no | 15 | 21% | |||||
| … yes | 58 | 79% | |||||
| fat_percent | 55 | 40 | 7.8 | 18 | 37 | 45 | 61 |
| cortisol | 73 | 8.1 | 16 | 0.41 | 1.4 | 6.8 | 105 |
| lgCort | 73 | 1.2 | 1.2 | -0.88 | 0.34 | 1.9 | 4.7 |
| breed | 73 | ||||||
| … mix | 16 | 22% | |||||
| … ckcs | 7 | 10% | |||||
| … pug | 7 | 10% | |||||
| … ret | 17 | 23% | |||||
| … other | 26 | 36% |
par(mfrow = c(1,1))
vioplot(cortisol ~ sex, col = "firebrick",
data = df)
par(mfrow = c(1,1))
vioplot(lgCort ~ sex, col = "lemonchiffon",
data = df)
par(mfrow = c(1,1))
vioplot(lgCort ~ breed, col = "firebrick",
data = df)
stripchart(lgCort ~ breed, vertical = TRUE, method = "jitter",
col = "steelblue3", data = df, pch = 20)
df$slgCort <- standardize(df$lgC)
summary(df$slgCort)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.7079 -0.6925 -0.2768 0.0000 0.6142 2.8713
hist(df$slgCort)
df2 <- na.omit(df)
model <- brm(slgCort ~ sex + + (1 | visit), family = skew_normal(), data = df2)
default_prior(slgCort ~ sex + + (1 | visit),
family = skew_normal(),
data = df)
## prior class coef group resp dpar nlpar lb ub
## normal(0, 4) alpha
## (flat) b
## (flat) b sexMale
## student_t(3, -0.3, 2.5) Intercept
## student_t(3, 0, 2.5) sd 0
## student_t(3, 0, 2.5) sd visit 0
## student_t(3, 0, 2.5) sd Intercept visit 0
## student_t(3, 0, 2.5) sigma 0
## source
## default
## default
## (vectorized)
## default
## default
## (vectorized)
## (vectorized)
## default
In humans, males have > hair cortisol cf females (Binz TM. ForensicSciInt 2018; 284:33–8. doi: 10.1016/j.forsciint.2017.12.032) …but effect is opposite in vervet monlkys (Laudenslager ML. Psychoneuroendocrinology 2012; 37:1736–9. doi: 10.1016/j.psyneuen.2012.03.015). … no effect of sex in a previous dog study (Macbeth BJ. Wildl Soc Bull 2012; 36:747–58. doi: 10.1002/wsb.219) … however, this study did find that neutered dogs had decreased hair cortisol. … as all dogs in the study were neutered, this means that an effect of sex is less likely. Bowland et al, female dogs had > hair cortisol than male dogs, but all dogs were intact (Bowland JB. Front. Vet. Sci 2020; 7:565346. doi: 10.3389/fvets.2020.565346) Further, this effect was lost when accounting for other effects.
Therefore, use a regularising prior but keep it neutral and broad, to allow the effect to be either way.
NB Bowland found no age effect. They also found a negative effect of BCS on log hair cortisol (beta -0.03). However, BCS ranged from 1-6 (with only 1 BCS 6), so few overweight…. could suggest poor nutrition and health cf obesity. (Bowland JB. Front. Vet. Sci 2020; 7:565346. doi: 10.3389/fvets.2020.565346)
# Set individual priors
prior_int <- set_prior("normal(0, 0.5)", class = "Intercept")
prior_sig <- set_prior("exponential(1)", class = "sigma")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")
# Combine priors into list
priors <- c(prior_int, prior_sig, prior_b, prior_sd)
x <- seq(-3, 3, length.out = 100)
y <- dnorm(x, mean = 0, sd = 0.5)
plot(y ~ x, type = "l")
x <- seq(0, 3, length.out = 100)
y <- dexp(x, rate = 1)
plot(y ~ x, type = "l")
x <- seq(-3, 3, length.out = 100)
y <- dnorm(x, mean = 0, sd = 1)
plot(y ~ x, type = "l")
Increased adapt_delta >0.8 (0.9 here), as had divergent transitions
set.seed(666)
model <- brm(slgCort ~ sex + (1 | visit),
family = skew_normal(),
prior = priors,
data = df,
control=list(adapt_delta=0.9999, stepsize = 0.001, max_treedepth =15),
iter = 8000, warmup = 2000,
cores = 4,
save_pars = save_pars(all =TRUE),
sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
summary(model)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: slgCort ~ sex + (1 | visit)
## Data: df (Number of observations: 73)
## Draws: 4 chains, each with iter = 8000; warmup = 2000; thin = 1;
## total post-warmup draws = 24000
##
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.37 0.36 0.01 1.38 1.00 7099 8729
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.07 0.26 -0.62 0.47 1.00 10572 10953
## sexMale 0.14 0.20 -0.26 0.54 1.00 16874 14950
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.00 0.09 0.84 1.20 1.00 14796 14707
## alpha 3.60 1.53 1.05 7.10 1.00 14090 11016
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(model)
Looking for hairy caterpillars
mcmc_plot(model, type = 'rank_overlay')
Usually better than the compatoability intervals given in the summary
draws <- as.matrix(model)
HPDI(draws[,2], 0.97) # 1st column is draws for sex
## |0.97 0.97|
## -0.2962279 0.6009676
bayes_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
## Estimate Est.Error Q1.5 Q50 Q98.5
## R2 0.02487013 0.02556297 0.000220508 0.01677636 0.105431
loo_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
## Estimate Est.Error Q1.5 Q50 Q98.5
## R2 -0.01485809 0.02977083 -0.1013518 -0.01143804 0.03774662
checks whether actual data is similar to simulated data.
pp_check(model, ndraws = 100)
par(mfrow = c(1,1))
pp_check(model, type = "hist", ndraws = 11, binwidth = 0.25) # separate histograms of 11 MCMC draws vs actual data
pp_check(model, type = "error_hist", ndraws = 11) # separate histograms of errors for 11 draws
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pp_check(model, type = "scatter_avg", ndraws = 100) # scatter plot
pp_check(model, type = "stat_2d") # scatterplot of joint posteriors
## Using all posterior draws for ppc type 'stat_2d' by default.
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
# PPC functions for predictive checks based on (approximate) leave-one-out (LOO) cross-validation
pp_check(model, type = "loo_pit_overlay", ndraws = 1000)
## NOTE: The kernel density estimate assumes continuous observations and is not optimal for discrete observations.
pp_check(model, type = "error_scatter_avg")
## Using all posterior draws for ppc type 'error_scatter_avg' by default.
pairs(model)
loo_model <- loo(model, moment_match = TRUE)
loo_model
##
## Computed from 24000 by 73 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -101.9 6.2
## p_loo 4.0 0.8
## looic 203.7 12.4
## ------
## MCSE of elpd_loo is 0.0.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.5, 1.0]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
First, check the sensitivity of the prior and likelihood to power-scaling. Posterior and posteriors resulting from power-scaling.
powerscale_sensitivity(model, variable = c("b_Intercept", "sigma", "b_sexMale"))
## Sensitivity based on cjs_dist
## Prior selection: all priors
## Likelihood selection: all data
##
## variable prior likelihood diagnosis
## b_Intercept 0.036 0.020 -
## sigma 0.041 0.123 -
## b_sexMale 0.004 0.090 -
These values appear similar to what was set for the priors, so seems OK?
check_prior(model, effects = "all")
## Parameter Prior_Quality
## 1 b_Intercept informative
## 2 b_sexMale informative
## 3 sd_visit__Intercept informative
prior <- prior_draws(model)
prior %>% glimpse()
## Rows: 24,000
## Columns: 5
## $ Intercept <dbl> -0.30705975, -0.65318192, 0.92875257, -0.14325756, -0.541652…
## $ b <dbl> -0.06464226, 0.51546262, -0.34237197, -0.63938582, 0.5051729…
## $ sigma <dbl> 1.67504396, 0.38050879, 1.97075232, 1.12274087, 2.20491112, …
## $ alpha <dbl> -1.06373411, 5.30320031, -2.68614051, 1.71938552, -0.1959270…
## $ sd_visit <dbl> 0.20646517, 0.39890415, 0.31625996, 0.89497288, 0.28468060, …
set.seed(5)
prior %>%
slice_sample(n = 50) %>%
rownames_to_column("draw") %>%
expand_grid(a = c(0, 1)) %>%
mutate(c = Intercept + b * a) %>%
ggplot(aes(x = a, y = c)) +
geom_line(aes(group = draw),
color = "firebrick", alpha = .4) +
geom_point(color = "firebrick", size = 2) +
labs(x = "Sex (male)",
y = "log(cort) (std)") +
coord_cartesian(ylim = c(-3, 3)) +
theme_bw() +
theme(panel.grid = element_blank())
Can simulate data just on the priors. Fit model but only consider prior when fitting model. If this looks reasonable, it helps to confirm that your priors were reasonable
set.seed(666)
model_priors_only <- brm(slgCort ~ sex + (1 | visit),
family = skew_normal(),
prior = priors,
data = df,
sample_prior = "only")
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 5.7e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.57 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.017 seconds (Warm-up)
## Chain 1: 0.015 seconds (Sampling)
## Chain 1: 0.032 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 5e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.05 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.021 seconds (Warm-up)
## Chain 2: 0.021 seconds (Sampling)
## Chain 2: 0.042 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 2e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.017 seconds (Warm-up)
## Chain 3: 0.019 seconds (Sampling)
## Chain 3: 0.036 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 3e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.017 seconds (Warm-up)
## Chain 4: 0.017 seconds (Sampling)
## Chain 4: 0.034 seconds (Total)
## Chain 4:
pp_check(model_priors_only, ndraws = 100)
as_draws_df(model) %>%
select(b_Intercept:sigma) %>%
cov() %>%
round(digits = 3)
## Warning: Dropping 'draws_df' class as required metadata was removed.
## b_Intercept b_sexMale sd_visit__Intercept sigma
## b_Intercept 0.068 -0.017 -0.001 0.004
## b_sexMale -0.017 0.042 -0.001 -0.001
## sd_visit__Intercept -0.001 -0.001 0.131 0.000
## sigma 0.004 -0.001 0.000 0.008
NB Uses posterior_predict
# use posterior predict to simulate predictions
ppd <- posterior_predict(model)
dim(ppd)
## [1] 24000 73
par(mfrow = c(2,2))
stripchart(slgCort ~ sex, vertical = TRUE, method = "jitter",
col = "steelblue3", data = df, pch = 20, main = "Observed")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ sex, vertical = TRUE, method = "jitter",
col = "firebrick", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ sex, vertical = TRUE, method = "jitter",
col = "firebrick", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ sex, vertical = TRUE, method = "jitter",
col = "firebrick", data = df, pch = 20, main = "PPD")
plot(conditional_effects(model), ask = FALSE)
ce <- conditional_effects(model, effects = "sex")
ce_df <- ce[[1]][c(1, 6:9)]
ggplot(ce_df, aes(x=sex, y=estimate__, group=1)) +
geom_errorbar(width=.1, aes(ymin=lower__, ymax=upper__), colour=c("#F8766D", "#00BFC4"), linewidth = 1) +
geom_point(shape=21, size=6, fill=c("#F8766D", "#00BFC4")) +
theme_bw() +
labs(title = "Conditional effect of sex on hair cortisol") +
labs(y = paste0("Log Hair Cortisol (standardised)")) +
labs(x = paste0("Sex")) +
theme(axis.title.y = element_text(size=12, face="bold"),
axis.title.x = element_text(size=12, face="bold"),
title = element_text(size=12, face="bold"),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(color = "grey25", size = 12),
axis.text.y = element_text(color = "grey50", size = 10))
mcmc_plot(model, variable = c(
"b_Intercept",
"sigma",
"b_sexMale"))
mcmc_plot(model,
variable = c("b_sexMale", "prior_b"))
mcmc_plot(model,
variable = c("b_sexMale", "prior_b"),
type = "areas") +
theme_classic() +
labs(title = "Prior vs posterior distribution for sex effect") +
labs(y = "") +
labs(x = paste0("Possible parameter values")) +
scale_y_discrete(labels=c("prior_b" = "Prior for male", "b_sexMale" = "Posterior for male"),
limits = c("prior_b", "b_sexMale")) +
theme(axis.title.y = element_text(size=12, face="bold"),
axis.title.x = element_text(size=12, face="bold"),
title = element_text(size=12, face="bold"),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(color = "grey50", size = 12),
axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
posterior <- as.matrix(model)
mcmc_areas(posterior,
pars = c("b_Intercept", "sigma",
"b_sexMale"),
# arbitrary threshold for shading probability mass
prob = 0.75)
posterior <- as.matrix(model)
mcmc_areas(posterior,
pars = c("b_sexMale"),
# arbitrary threshold for shading probability mass
prob = 0.97) +
theme_classic() +
labs(title = "Posterior distribution for sex effect",
y = "Density distribution",
x = "Possible parameter values") +
scale_y_discrete(labels=c("b_sexMale" = "Posterior for male")) +
theme(axis.title.y = element_text(size=12, face="bold"),
axis.title.x = element_text(size=12, face="bold"),
title = element_text(size=12, face="bold"),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(color = "grey50", size = 12),
axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95))
plot(hdi_range, show_intercept = T)
# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95),
parameters = "b_sexMale")
plot(hdi_range, show_intercept = T) +
labs(title = "Posterior distribution for sex effect") +
labs(y = "Density distribution") +
labs(x = "Possible parameter values") +
theme(axis.title.y = element_text(size=12, face="bold"),
axis.title.x = element_text(size=12, face="bold"),
title = element_text(size=12, face="bold"),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(color = "grey50", size = 12),
axis.text.y = element_text(color = "grey8",size = 12))
draws <- as.matrix(model)
mean(draws[,2] >0)
## [1] 0.7569583
mean(draws[,2] <0)
## [1] 0.2430417
HPDI(draws[,2], prob=0.97)
## |0.97 0.97|
## -0.2962279 0.6009676
# create new dataframe which contains results of the first dog
new_data <- rbind(df[1,], df[1,])
# Now change one category to be different
new_data$sex <- c("Female", "Male")
# Visualise df to make sure it has worked
new_data
## number group visit season breed_group coat_colour sex age comorbidity
## 1 c1 stopped v0 winter ret dark Female 43 yes
## 2 c1 stopped v0 winter ret dark Male 43 yes
## fat_percent cortisol lgCort breed slgCort
## 1 52.21393 4.92422 1.594166 ret 0.3415375
## 2 52.21393 4.92422 1.594166 ret 0.3415375
# Now get mean predictions from the draws of the model
pred_means <- posterior_predict(model, newdata = new_data)
# Compare difference in means for each breedversus mix
differenceMale <- pred_means[,1] - pred_means[,2]
par(mfrow = c(2,2))
# Examine mean of difference
mean(differenceMale)
## [1] -0.1214526
# View histogram of this
hist(differenceMale)
# Create HPDI
HPDI(differenceMale, 0.97)
## |0.97 0.97|
## -3.218014 3.233322
# create new dataframe which contains results of all dogs
new_data1 <- df
# Now change one category to be different
new_data1$sex <- c("Male")
# create new dataframe which contains result sof all dogs
new_data2 <- df
# Now change one category to be different
new_data2$sex <- c("Female")
# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model, newdata = new_data1)
pred_nd2 <- posterior_predict(model, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)
# Create mean of differences for each column (dog) of the dataframe
pred_diff_ckcs <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_ckcs)
# Examine mean of difference
mean(pred_diff_ckcs)
## [1] 0.1360314
# View histogram of this
HPDI(pred_diff_ckcs, 0.93)
## |0.93 0.93|
## 0.1178648 0.1535624
pred_slgCort <- posterior_epred(model)
av_pred_slgCort <- colMeans(pred_slgCort)
plot(av_pred_slgCort ~ df$slgCort)
set.seed(666)
nd <- tibble(visit = 'v0', sex = c("Female", "Male"))
p1 <-
predict(model,
resp = "slgCort",
newdata = nd) %>%
data.frame() %>%
bind_cols(nd) %>%
ggplot(aes(x = sex, y = Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_linerange(aes(ymin = Q2.5, ymax = Q97.5),
linewidth = 1, color = "#F8766D", alpha = 3/5) +
geom_point(size = 5, color = "#F8766D") +
theme_bw() +
labs(title = "Total counterfactual effect of sex on log hair cortisol") +
labs(y = paste0("Counterfactual estimate of Log Hair Cortisol (std)")) +
labs(x = paste0("Manipulated visit")) +
theme(axis.title.y = element_text(size=12, face="bold"),
axis.title.x = element_text(size=12, face="bold"),
title = element_text(size=12, face="bold"),
plot.title = element_text(hjust = 0.5)) +
coord_cartesian(ylim = c(-2.5, 2.5))
plot(p1)
NB no sigma prior because this will be estimated in in the model
# Set individual priors
prior_int <- set_prior("normal(0, 1)", class = "Intercept")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")
# Combine priors into list
priors2 <- c(prior_int, prior_b, prior_sd)
Increased adapt_delta >0.8 (0.9 here), as had divergent transitions
set.seed(666)
model2 <- brm(bf(slgCort ~ sex + (1 | visit),
sigma ~ sex),
family = skew_normal(),
prior = priors2,
data = df,
control=list(adapt_delta=0.9999, stepsize = 0.001, max_treedepth =15),
iter = 8000, warmup = 2000,
cores = 4,
save_pars = save_pars(all =TRUE),
sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
summary(model2)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.9999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: skew_normal
## Links: mu = identity; sigma = log; alpha = identity
## Formula: slgCort ~ sex + (1 | visit)
## sigma ~ sex
## Data: df (Number of observations: 73)
## Draws: 4 chains, each with iter = 8000; warmup = 2000; thin = 1;
## total post-warmup draws = 24000
##
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.45 0.41 0.01 1.54 1.00 5994 7531
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.14 0.38 -0.95 0.66 1.00 7684 8143
## sigma_Intercept -0.10 0.12 -0.33 0.14 1.00 15320 14706
## sexMale 0.27 0.24 -0.19 0.76 1.00 14679 13924
## sigma_sexMale 0.22 0.18 -0.12 0.58 1.00 15520 15379
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## alpha 3.28 1.45 0.75 6.60 1.00 14572 10750
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
loo_model2 <- loo(model, moment_match = TRUE)
loo_model2
##
## Computed from 24000 by 73 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -101.9 6.2
## p_loo 4.0 0.8
## looic 203.7 12.4
## ------
## MCSE of elpd_loo is 0.0.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.5, 1.0]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
model <- add_criterion(model, "loo")
model2 <- add_criterion(model2, "loo")
loo_compare(model, model2)
## elpd_diff se_diff
## model 0.0 0.0
## model2 -0.4 1.3
Very little difference between models, so probably better to use simplest model.